Projet Chamois

1 Chargement des librairies


library(tidyverse)
library(corrplot)
library(lmerTest)
library(ade4)
library(splines)
library(car)
library(plotly)
library(DT)
library(Hmisc)

2 Prealable


2.1 Import des donnees

setwd(".")
load('cham.Rdata')
datatable(cham, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )

2.2 Elimination des donnees aberrantes

Les chamois observés après leur mort ou avant leur naissance sont retirés du jeu de donnees.

cham <- cham[cham$year<=cham$ydth,]
cham <- cham[cham$year>=cham$coh,]

2.3 Creation des variables age et longevite (long)

cham2 <- cham %>%
  summarise(cham, age= year-coh, long=ydth-coh)

3 Lien fecondite annuelle et age des femelles


3.1 Représentation graphique des données

3.1.1 Représentation par classe d’age

cham_age <- cham2 %>% 
  group_by(age) %>%
  dplyr::summarise(totnaissance= sum(fec), taillepop=n(), fecmean=totnaissance/n())

cham_age$fecmean <- round(cham_age$fecmean, 2)

  plot1 <- ggplot(cham_age, aes(x=age, y=fecmean)) + 
    geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity") + 
    labs(title = "Fécondité moyenne de la population en fonction de l'age",x="Age", y="Fécondité moyenne de la population") + 
    theme(plot.title = element_text(hjust = 0.5)) + 
    geom_smooth()
ggplotly(plot1)

3.1.2 Representation sans grouper par classe d’age

3.1.2.1 Utilisation de la fonction jitter

plot2 <- ggplot(cham2, aes(x=age, y=fec)) + 
    geom_jitter(width = 0.55, height = 0) + 
    labs(title = "Fécondité annuelle en fonction de l'age",x="Age", y="Fécondité annuelle") +
    theme(plot.title = element_text(hjust = 0.5)) + 
    geom_smooth()
ggplotly(plot2)

3.1.2.2 Utilisation de la fonction geom_count

plot3 <- ggplot(cham2, aes(x=age, y=fec)) +
    geom_count() + 
    labs(title = "Fécondité annuelle en fonction de l'age",x="Age", y="Fécondité annuelle")+
    theme(plot.title = element_text(hjust = 0.5))+
    geom_smooth(); plot3

3.2 Analyse statistique du lien entre fecondite annuelle et l’age des femelles

3.2.1 Modèles de régression lineaire generalise avec effets aléatoires


3.2.1.1 First

Le premier modele applique un modele glm1 qui utilise la fonction de lien binomiale et la variable “id” comme effet aléatoire pour prendre en compte le fait que les observations sont repetees sur les mêmes individus sur plusieurs annees.

glm1 <- glmer(fec ~ age + (1| id),data=cham2, family = binomial)
summary(glm1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ age + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1212.3   1226.7   -603.2   1206.3      906 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9150 -1.0621  0.6421  0.7819  1.1219 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.3319   0.5761  
## Number of obs: 909, groups:  id, 163
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.83210    0.20527   4.054 5.04e-05 ***
## age         -0.04723    0.01946  -2.426   0.0152 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr)
## age -0.905
Anova(glm1)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: fec
##      Chisq Df Pr(>Chisq)  
## age 5.8874  1    0.01525 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
surdispersion_glm1 <- 1212/906;surdispersion_glm1
## [1] 1.337748

Interpretation des coefficients:

summary(glm1)$coefficients
##                Estimate Std. Error   z value     Pr(>|z|)
## (Intercept)  0.83209798  0.2052743  4.053590 5.043751e-05
## age         -0.04722673  0.0194638 -2.426388 1.524995e-02
exp(summary(glm1)$coefficients[2])
## [1] 0.9538711
Coeff <- ((1/exp(summary(glm1)$coefficients[2]))-1)*100

Avec le modele glm1, AIC/df = 1.3 donc il n’y a pas de surdispersion observee. Il est 4.8359681% moins vraisemblable que les chamois aient un petit lorsque leur age augmente d’un an (p-value = 0.0152).

3.2.1.2 Second

On ajoute la variable “year” en effet additif ou multiplicatif car il pourrait y avoir un effet “annee” dans la tendance observee sur la fecondite annuelle.

glm2 <- glmer(fec ~ age+ year + (1| id),data=cham2, family = binomial)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.166371 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?

La variable “year” doit être centree normee pour pouvoir faire converger le modele.

year_scale<-scale(cham2$year, center=TRUE, scale= TRUE)
glm2 <- glmer(fec ~ age+ year_scale + (1| id),data=cham2, family = binomial)
summary(glm2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ age + year_scale + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1212.4   1231.7   -602.2   1204.4      905 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0424 -1.0533  0.6394  0.7823  1.1856 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.327    0.5719  
## Number of obs: 909, groups:  id, 163
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.75566    0.21150   3.573 0.000353 ***
## age         -0.03888    0.02023  -1.922 0.054627 .  
## year_scale  -0.11728    0.08562  -1.370 0.170774    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr) age   
## age        -0.911       
## year_scale  0.253 -0.291
glm3 <- glmer(fec ~ age*year_scale + (1| id),data=cham2, family = binomial)
summary(glm3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ age * year_scale + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1212.3   1236.4   -601.2   1202.3      904 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9099 -1.0508  0.6212  0.7703  1.2340 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.398    0.6309  
## Number of obs: 909, groups:  id, 163
## 
## Fixed effects:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     0.78482    0.21836   3.594 0.000325 ***
## age            -0.04011    0.02080  -1.929 0.053787 .  
## year_scale      0.16827    0.21931   0.767 0.442931    
## age:year_scale -0.02916    0.02058  -1.417 0.156513    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) age    yr_scl
## age         -0.909              
## year_scale   0.202 -0.171       
## age:yer_scl -0.100  0.048 -0.916

On n’observe pas de surdispersion pour les deux modeles testes. Que ce soit en effet mutiplicatif ou additif, la variable “year” n’a pas un effet significatif en addition à la variable “age” (p value > 0.1 en effet additif et multiplicatif). La variable “age” a un effet plus marque que la variable “year” avec une p value proche de 0.5.

3.2.1.3 Third

Test du modele avec deux effets aleatoires (variables “id” et “year”)

glm4<- glmer(fec ~ age + (1| id) + (1| year),data=cham2, family = binomial)
summary(glm4)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ age + (1 | id) + (1 | year)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1212.2   1231.5   -602.1   1204.2      905 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0185 -1.0136  0.6258  0.7816  1.2299 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.34280  0.5855  
##  year   (Intercept) 0.07463  0.2732  
## Number of obs: 909, groups:  id, 163; year, 26
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.82199    0.21943   3.746  0.00018 ***
## age         -0.04585    0.01985  -2.310  0.02089 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##     (Intr)
## age -0.867
Anova(glm4)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: fec
##      Chisq Df Pr(>Chisq)  
## age 5.3362  1    0.02089 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
surdispersion_glm4 <- 1212/905;surdispersion_glm4
## [1] 1.339227

Les resultats du modele glm4 sont similaires à ceux du modele glm1 (AIC, df, p value et coefficients similaires) donc le fait d’ajouter la variable “year” comme effets aléatoires n’apportent pas plus d’explication sur la variance observee.

4 Variation de la fecondite annuelle en fonction du temps


4.1 Représentation graphique des données

4.1.1 Représentation graphique par annee

cham_ans = cham2 %>% 
  group_by(year) %>% 
  dplyr::summarise(totnaissance= sum(fec), taillepop=n(), agemoyen=mean(age)) %>% 
  mutate(fecperan=totnaissance/taillepop)
cham_ans$fecperan <- round(cham_ans$fecperan, 2)
plot4 <- ggplot(cham_ans, aes(x=year, y=fecperan)) +
    geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity") + 
    labs(title = "Fécondité moyenne de la population en fonction des annees",x="Annees", y="Fécondité moyenne") +
    theme(plot.title = element_text(hjust = 0.5)) +
    geom_smooth(); plot4

plot5 <- ggplot(data = cham_ans, aes(x = year,y=agemoyen))+
     labs(title = "Age moyen de la population en fonction des annees",x="Annees", y="Age moyen") +
    theme(plot.title = element_text(hjust = 0.5)) +
    geom_point()+
    geom_smooth(); plot5

4.1.2 Représentation graphique sans grouper par annee

plot6 <- ggplot(cham2, aes(x=year, y=fec)) + 
    labs(title = "Fécondité annuelle en fonction des annees",x="Annees", y="Fécondité annuelle") +
    geom_jitter(width = 0.55, height = 0) +
    geom_smooth()+
    theme(plot.title = element_text(hjust = 0.5)); plot6

plot7 <- ggplot(cham2, aes(x=year, y=fec)) + 
    geom_count() + 
    labs(title = "Fécondité annuelle en fonction des annees",x="Annees", y="Fécondité annuelle") +
    theme(plot.title = element_text(hjust = 0.5))+
    geom_smooth(); plot7

plot8 <- ggplot(data = cham_ans, aes(x = year,y=agemoyen))+
   labs(title = "Age moyen de la population en fonction des annees",x="Annees", y="Age moyen") +
    theme(plot.title = element_text(hjust = 0.5)) +
    geom_point()+
    geom_smooth(); plot8

4.2 Analyse statistique du lien entre fécondité annuelle et temps

4.2.1 Modèles de régression lineaire generalise avec effets aléatoires


4.2.1.1 First

Le premier modele applique un modele glm5 qui utilise la fonction de lien binomiale et la variable “id” comme effet aléatoire pour prendre en compte le fait que les observations sont repetees sur les mêmes individus sur plusieurs annees.

glm5 <- glmer(fec ~ year_scale + (1| id),data=cham2, family = binomial)
summary(glm5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ year_scale + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1214.1   1228.6   -604.1   1208.1      906 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8977 -1.0434  0.6474  0.7799  1.1928 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.3358   0.5794  
## Number of obs: 909, groups:  id, 163
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.38601    0.08734    4.42 9.89e-06 ***
## year_scale  -0.16692    0.08263   -2.02   0.0434 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## year_scale -0.028
Anova(glm5)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: fec
##             Chisq Df Pr(>Chisq)  
## year_scale 4.0807  1    0.04337 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
surdispersion_glm5 <- 1214/906;surdispersion_glm5
## [1] 1.339956

Pas de surdispersion observee. D’apres la p-value<0.05, il y a aurait un effet significatif de la variable “year” sur la fecondite annuelle. Or, graphiquement, l’effet de la variable “year” semble negligeable. On se rend compte que l’age moyen de la population augmente avec les annees et on sait grace au modele glm1 que la variable “age” a un effet significatif sur la fecondite annuelle. Ainsi, il est important de regarder le modele glm3 qui presente les effets multiplicatids “year” et “age”.

glm3 <- glmer(fec ~ age*year_scale + (1| id),data=cham2, family = binomial)
summary(glm3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ age * year_scale + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1212.3   1236.4   -601.2   1202.3      904 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9099 -1.0508  0.6212  0.7703  1.2340 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.398    0.6309  
## Number of obs: 909, groups:  id, 163
## 
## Fixed effects:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     0.78482    0.21836   3.594 0.000325 ***
## age            -0.04011    0.02080  -1.929 0.053787 .  
## year_scale      0.16827    0.21931   0.767 0.442931    
## age:year_scale -0.02916    0.02058  -1.417 0.156513    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) age    yr_scl
## age         -0.909              
## year_scale   0.202 -0.171       
## age:yer_scl -0.100  0.048 -0.916

La variable “age” a un effet avec une p value proche de 0.5 alors que les p value associees à la variable “year” et aux effets multiplicatifs ne sont pas significatifs (p valu>0.1). La variable “year” n’a donc pas d’effet significatif sur la fecondite annuelle des chamois.

5 Lien entre fecondite totale et longevite des animaux


5.1 Représentation graphique des données

cham_id = cham2 %>% 
  group_by(id) %>% 
  dplyr::summarise(feconditetotale= sum(fec), long=long, pds=pds, anneetot=(ydth-min(year)+1), MinAn=min(year), MaxAn=max(year), AgePds=min(age)) %>%
  unique()%>%
  mutate(fectotrelative=feconditetotale/anneetot)%>%
  drop_na(long)
plot9 <-ggplot(cham_id, aes(x=long, y=feconditetotale)) +
    geom_count() +
    labs(title = "Fecondite totale en fonction de la longevite",x="Longevite", y="Fecondite totale") +
    theme(plot.title = element_text(hjust = 0.5)) +
    geom_smooth(); plot9

plot10 <- ggplot(cham_id, aes(x=long, y=feconditetotale)) +
    geom_jitter(width = 0.25, height = 0.25)+
     labs(title = "Fecondite totale en fonction de la longevite",x="Longevite", y="Fecondite totale") +
    theme(plot.title = element_text(hjust = 0.5)) +
    geom_smooth(); plot10

Tous les chamois n’ont pas été suivis le même nombre d’annee. Pour prendre en compte ce biais dans la fecondite totale des chamois, la fecondite totale a ete divisee par le nombre d’annees de suivi.

plot11 <- ggplot(data = cham_id, aes(x=long, y=fectotrelative))+
    geom_point(color="blue", fill=rgb(0.1,0.4,0.5,0.7)) +
    labs(title = "Fecondite sur le nombre d'annees suivies en fonction de la longevite",x="Longevite", y="Fecondite relative au nombre d'annees suivies") +
    geom_smooth() +
    theme(plot.title = element_text(hjust = 0.5)); plot11

5.2 Analyse statistique du lien entre la fécondité annuelle et la longevite

5.2.1 Tests de modèles de régression lineaire generalise avec effets aléatoires


5.2.1.1 First => Voir avec Karim pour questions. Pas l’impression de pouvoir deduire de relation quand prise en compte du nombre d’annees de suivi dans la relation

test1 <- lm(feconditetotale~long, data = cham_id)
summary(test1)
## 
## Call:
## lm(formula = feconditetotale ~ long, data = cham_id)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.2395 -1.6491 -0.3901  1.7214  8.5738 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.28163    0.63538   0.443    0.658    
## long         0.25904    0.05098   5.081 1.03e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.66 on 161 degrees of freedom
## Multiple R-squared:  0.1382, Adjusted R-squared:  0.1328 
## F-statistic: 25.81 on 1 and 161 DF,  p-value: 1.031e-06
plot(test1)

hist(resid(test1))

#Non homogeneite des residus

test2 <- lm(feconditetotale~log(long), data = cham_id)
summary(test2)
## 
## Call:
## lm(formula = feconditetotale ~ log(long), data = cham_id)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3236 -1.8056 -0.3497  1.7138  8.6503 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -3.0910     1.2331  -2.507   0.0132 *  
## log(long)     2.6837     0.5079   5.283 4.07e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.645 on 161 degrees of freedom
## Multiple R-squared:  0.1478, Adjusted R-squared:  0.1425 
## F-statistic: 27.91 on 1 and 161 DF,  p-value: 4.066e-07
plot(test2)

#Non homogeneite des residus

test3 <- lm(sqrt(feconditetotale)~long, data = cham_id)
summary(test3)
## 
## Call:
## lm(formula = sqrt(feconditetotale) ~ long, data = cham_id)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.33973 -0.44205  0.02458  0.62207  1.71466 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.86498    0.19205   4.504 1.28e-05 ***
## long         0.06412    0.01541   4.161 5.15e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8041 on 161 degrees of freedom
## Multiple R-squared:  0.09708,    Adjusted R-squared:  0.09148 
## F-statistic: 17.31 on 1 and 161 DF,  p-value: 5.151e-05
plot(test3)

#Non homogeneite des residus

cham_id2 <- cham_id[cham_id$feconditetotale!=0,]
test4 <- lm(log(feconditetotale)~(log(long)), data = cham_id2)
summary(test4)
## 
## Call:
## lm(formula = log(feconditetotale) ~ (log(long)), data = cham_id2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.46036 -0.48276  0.06982  0.58994  1.20898 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.1012     0.3569  -3.086  0.00243 ** 
## log(long)     0.8863     0.1468   6.039 1.24e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6879 on 145 degrees of freedom
## Multiple R-squared:  0.201,  Adjusted R-squared:  0.1955 
## F-statistic: 36.47 on 1 and 145 DF,  p-value: 1.24e-08
plot(test4)

#Non homogeneite des residus

test5 <- lm(log(feconditetotale+1)~(long), data = cham_id2)
summary(test5)
## 
## Call:
## lm(formula = log(feconditetotale + 1) ~ (long), data = cham_id2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.09484 -0.34652 -0.01657  0.41983  0.97945 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.63262    0.13315   4.751 4.82e-06 ***
## long         0.06419    0.01075   5.971 1.74e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5007 on 145 degrees of freedom
## Multiple R-squared:  0.1974, Adjusted R-squared:  0.1918 
## F-statistic: 35.65 on 1 and 145 DF,  p-value: 1.736e-08
plot(test5)

#Non homogeneite des residus

test6 <- lm(fectotrelative~long, data = cham_id)
summary(test6)
## 
## Call:
## lm(formula = fectotrelative ~ long, data = cham_id)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57107 -0.22551  0.01043  0.21283  0.44332 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.5545229  0.0694994   7.979 2.61e-13 ***
## long        0.0007195  0.0055768   0.129    0.898    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.291 on 161 degrees of freedom
## Multiple R-squared:  0.0001034,  Adjusted R-squared:  -0.006107 
## F-statistic: 0.01664 on 1 and 161 DF,  p-value: 0.8975
plot(test6)

#Non homogeneite des residus

test7 <- lm(log(fectotrelative)~long, data = cham_id2)
summary(test7)
## 
## Call:
## lm(formula = log(fectotrelative) ~ long, data = cham_id2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.51798 -0.28228  0.07845  0.31246  0.59766 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.613173   0.113211  -5.416 2.47e-07 ***
## long         0.005171   0.009140   0.566    0.572    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4257 on 145 degrees of freedom
## Multiple R-squared:  0.002203,   Adjusted R-squared:  -0.004678 
## F-statistic: 0.3201 on 1 and 145 DF,  p-value: 0.5724
plot(test7)

test8 <- glm(data=cham_id, feconditetotale~long, family="poisson")
summary(test8)
## 
## Call:
## glm(formula = feconditetotale ~ long, family = "poisson", data = cham_id)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.8849  -1.1120  -0.3623   0.8639   3.3159  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.24317    0.14334   1.697   0.0898 .  
## long         0.07730    0.01046   7.388 1.49e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 390.54  on 162  degrees of freedom
## Residual deviance: 335.96  on 161  degrees of freedom
## AIC: 772.39
## 
## Number of Fisher Scoring iterations: 5
test9 <- glm(data=cham_id, fectotrelative~long, family="quasipoisson")
summary(test9)
## 
## Call:
## glm(formula = fectotrelative ~ long, family = "quasipoisson", 
##     data = cham_id)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.06876  -0.32640   0.01389   0.26683   0.53371  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.589544   0.123663  -4.767 4.15e-06 ***
## long         0.001278   0.009907   0.129    0.898    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 0.1504962)
## 
##     Null deviance: 32.342  on 162  degrees of freedom
## Residual deviance: 32.340  on 161  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 4

6 Lien entre fecondite annuelle et longevite des animaux


6.1 Représentation graphique des données

plot12 <- ggplot(cham2, aes(x=long, y=fec)) +
    geom_count() + 
    labs(title = "Fécondité annuelle en fonction de la longevite",x="Longevite", y="Fécondité annuelle")+
    geom_smooth()+
    theme(plot.title = element_text(hjust = 0.5)); plot12

plot13 <- ggplot(cham2, aes(x=long, y=fec)) +
    geom_jitter(width = 0.55, height=0) + 
    labs(title = "Fécondité annuelle en fonction de la longevite",x="Longevite", y="Fécondité annuelle")+
    geom_smooth()+
    theme(plot.title = element_text(hjust = 0.5)); plot13

6.2 Analyse statistique du lien entre fecondite annuelle et longevite des femelles

6.2.1 Modèles de régression lineaire generalise avec effets aléatoires


6.2.1.1 First

glm11 <- glmer(fec ~ long + (1| id),data=cham2, family = binomial)
summary(glm11)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ long + (1 | id)
##    Data: cham2
## 
##      AIC      BIC   logLik deviance df.resid 
##   1218.0   1232.5   -606.0   1212.0      906 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7916 -1.0634  0.6517  0.7850  1.2047 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.3199   0.5656  
## Number of obs: 909, groups:  id, 163
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.23422    0.29351   0.798    0.425
## long         0.01175    0.02223   0.529    0.597
## 
## Correlation of Fixed Effects:
##      (Intr)
## long -0.956
Anova(glm11)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: fec
##       Chisq Df Pr(>Chisq)
## long 0.2793  1     0.5971

Pas de surdispersion. Pas d’effets significatifs observes de la variable “longevite” sur la fecondite annuelle (p value > 0.05)

6.2.1.2 Second => voir avec Karim la pertinence de segmenter en deux la variable

Graphiquement, une courbe concave se dessine donc on segmente la variable “longevite” en deux segments pour tester la relation longevite/fecondite annuelle sur les deux segments.

cham_long1 <- cham2[cham2$long<15,]
cham_long2 <- cham2[cham2$long>=15,]

plot16 <- ggplot(cham_long1, aes(x=long, y=fec)) +
    geom_count() + 
    labs(title = "Fécondité en fonction de la longevite",x="Longevite", y="Fécondité")+
    geom_smooth()+
    theme(plot.title = element_text(hjust = 0.5)); plot16
## Warning: Removed 408 rows containing non-finite values (`stat_sum()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).

glm12 <- glmer(fec ~ long + (1| id),data=cham_long1, family = binomial)
summary(glm12)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ long + (1 | id)
##    Data: cham_long1
## 
##      AIC      BIC   logLik deviance df.resid 
##    789.0    802.2   -391.5    783.0      585 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6864 -1.0755  0.6737  0.8230  1.1503 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.1568   0.3959  
## Number of obs: 588, groups:  id, 120
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -0.66433    0.39918  -1.664  0.09606 . 
## long         0.09835    0.03665   2.684  0.00728 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## long -0.971
Anova(glm12)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: fec
##       Chisq Df Pr(>Chisq)   
## long 7.2017  1   0.007283 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot17 <- ggplot(cham_long2, aes(x=long, y=fec)) +
    geom_count() + 
    labs(title = "Fécondité en fonction de la longevite",x="Longevite", y="Fécondité")+
    geom_smooth()+
    theme(plot.title = element_text(hjust = 0.5)); plot17
## Warning: Removed 408 rows containing non-finite values (`stat_sum()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).

glm13 <- glmer(fec ~ long + (1| id),data=cham_long2, family = binomial)
summary(glm13)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: fec ~ long + (1 | id)
##    Data: cham_long2
## 
##      AIC      BIC   logLik deviance df.resid 
##    418.9    430.3   -206.5    412.9      318 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9648 -1.0836  0.5871  0.7880  1.2014 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.3949   0.6284  
## Number of obs: 321, groups:  id, 43
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  4.35885    1.43312   3.041  0.00235 **
## long        -0.23441    0.08423  -2.783  0.00539 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##      (Intr)
## long -0.994
Anova(glm13)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: fec
##       Chisq Df Pr(>Chisq)   
## long 7.7448  1   0.005387 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7 Lien entre fecondite totale et poids


7.1 Représentation graphique des données

7.1.1 Vérification de la comparabilite des poids selon les ages de capture

plot18 <- ggplot(cham_id, aes(x=AgePds, y=pds)) + 
    geom_point(color="blue", fill=rgb(0.1,0.4,0.5,0.7)) + 
    labs(title = "Poids selon l'age de capture",x="Age de capture", y="Poids mesuré")+
    theme(plot.title = element_text(hjust = 0.5))+
    geom_smooth();plot18

On observe un impact fort de l’age sur le poids pour les ages faibles. On retire les outliers et on conserve les donnees situees dans l’intervalle de confiance à 99%.

cham_pds <- cham_id[which(cham_id$pds!="NA"),]
borninf = mean(cham_pds$pds)-3*(sd(cham_pds$pds)/sqrt(length(cham_pds$pds)))
borninf
## [1] 20.0376
bornsup = mean(cham_pds$pds)+3*(sd(cham_pds$pds)/sqrt(length(cham_pds$pds)))
bornsup
## [1] 22.11812
?sd
sqrt(length(cham_pds$pds))
## [1] 11.44552
cham_pds <- cham_pds%>%
  filter(pds<=22.11812&pds>=20.0376)

plot18 <- ggplot(cham_pds, aes(x=AgePds, y=pds)) + 
    geom_point(color="blue", fill=rgb(0.1,0.4,0.5,0.7)) + 
    geom_smooth();plot18

7.1.2 Représentation graphique de la fecondite totale en fonction du poids

plot19 <- ggplot(cham_pds, aes(x=pds, y=feconditetotale)) + 
    geom_point(color="blue", fill=rgb(0.1,0.4,0.5,0.7)) + 
    geom_smooth();plot19

7.1.3 Représentation graphique de la longevite en fonction du poids

plot20 <- ggplot(cham_pds, aes(x=pds, y=long)) + 
    geom_point(color="blue", fill=rgb(0.1,0.4,0.5,0.7)) + 
    geom_smooth();plot20

7.2 Analyse statistique du lien entre fecondite annuelle/longevite et poids des femelles

7.2.1 Modèles de régression lineaire generalise avec effets aléatoires


7.2.1.1 First

lm14 <- lm(feconditetotale ~ pds,data=cham_pds)
summary(lm14)
## 
## Call:
## lm(formula = feconditetotale ~ pds, data = cham_pds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4002 -2.1519 -0.6555  1.4273  9.5998 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  20.7763    23.0128   0.903    0.374
## pds          -0.8274     1.0838  -0.763    0.451
## 
## Residual standard error: 3.059 on 31 degrees of freedom
## Multiple R-squared:  0.01845,    Adjusted R-squared:  -0.01321 
## F-statistic: 0.5828 on 1 and 31 DF,  p-value: 0.451
lm15 <- lm(long ~ pds,data=cham_pds)
summary(lm15)
## 
## Call:
## lm(formula = long ~ pds, data = cham_pds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2055 -2.7518 -0.6201  1.9458  8.7945 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -49.209     27.773  -1.772   0.0863 .
## pds            2.829      1.308   2.163   0.0384 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.692 on 31 degrees of freedom
## Multiple R-squared:  0.1311, Adjusted R-squared:  0.1031 
## F-statistic: 4.679 on 1 and 31 DF,  p-value: 0.03837

Impact de la longevite significatif sur le poids mais pas de la fecondite totale.